fredr_set_key("2198d4f222b205f8ad6395cc9ec48a9d")
#Load Data
params <- list(
series_id = c("UNRATE",'PCEPILFE',
'EXPINF2YR','T10Y3M',
'GDPC1', 'GDPPOT',
'FEDFUNDS'),
frequency = c('q','q','q','q','q','q','q')
)
ps3_data <- purrr::pmap_dfr(
.l = params,
.f = ~ fredr(series_id = .x, frequency = .y)
)
ps3_data <- ps3_data %>%
dplyr::select(date, series_id, value) %>%
pivot_wider(names_from = series_id,
values_from = value) %>%
as_tsibble(index = date) %>%
filter(year(date) %in% 1987:2015) %>%
mutate(OUTPUTGAP = ((GDPC1 - GDPPOT)/GDPPOT)*100)

Determine the order of integration for each variable. Provide some evidence (e.g. maybe a graph of each variable and the intord output for one of them along with ADF test results) and your conclusions.

  1. UNRATE

Based on the intord function, the unemployment rate appears to be difference stationary. The graph of the first difference looks stationary, the standard deviation falls by over half after differencing, meaning that the decision heuristic supports the conclusion that the data is difference stationary. We can reject the null of non-stationarity for the ADF test at the first difference and the ACF declines faster at the first difference than at the levels.

ggplot(ps3_data, aes(x=date, y=UNRATE)) +
  geom_line() +
  ggtitle("Graph of Unemployment Rate")

intord(ps3_data$UNRATE)

## $test_results
##   level crit_value ADF_round1_stat ADF_round2_stat round_1_dec round_2_dec
## 1   10%      -2.58           -2.83           -4.17         REJ         REJ
## 2    5%      -2.89           -2.83           -4.17         FTR         REJ
## 3    1%      -3.50           -2.83           -4.17         FTR         REJ
  1. PCEPILFE: PCE Excluding Food and Energy

The series clearly has a trend, however, when we removed the trend the data still does not appear stationary. The first difference from the detrended data does appear stationary however. The standard deviation falls by over half after differencing, meaning that the decision heuristic supports the conclusion that the data is difference stationary. We can reject the null of non-stationarity for the ADF test at the first difference.

ggplot(ps3_data, aes(x=date, y=PCEPILFE)) +
  geom_line() +
  ggtitle("Graph of PCE Excluding Food and Energy")

intord(ps3_data$PCEPILFE)

## $test_results
##   level crit_value ADF_round1_stat ADF_round2_stat round_1_dec round_2_dec
## 1   10%      -2.58           -1.15           -6.44         FTR         REJ
## 2    5%      -2.89           -1.15           -6.44         FTR         REJ
## 3    1%      -3.50           -1.15           -6.44         FTR         REJ
model <- lm(PCEPILFE ~ date, data = ps3_data)
detrended_PCEPILFE <- ps3_data %>%
  mutate(detrended_PCEPILFE = PCEPILFE - predict(model))

ggplot(detrended_PCEPILFE, aes(x=date, y=detrended_PCEPILFE)) +
  geom_line() +
  ggtitle("Graph of Detrended PCE Excluding Food and Energy")

intord(detrended_PCEPILFE$detrended_PCEPILFE)

## $test_results
##   level crit_value ADF_round1_stat ADF_round2_stat round_1_dec round_2_dec
## 1   10%      -2.58           -1.29           -6.47         FTR         REJ
## 2    5%      -2.89           -1.29           -6.47         FTR         REJ
## 3    1%      -3.50           -1.29           -6.47         FTR         REJ
  1. EXPINF2YR: Expected Inflation - 2 Years

The series is stationary at the first difference based on evidence from the intord function.

ggplot(ps3_data, aes(x=date, y=EXPINF2YR)) +
  geom_line() +
  ggtitle("XPINF2YR: Expected Inflation - 2 Years")

intord(ps3_data$EXPINF2YR)

## $test_results
##   level crit_value ADF_round1_stat ADF_round2_stat round_1_dec round_2_dec
## 1   10%      -2.58           -1.69          -10.19         FTR         REJ
## 2    5%      -2.89           -1.69          -10.19         FTR         REJ
## 3    1%      -3.50           -1.69          -10.19         FTR         REJ
  1. T10Y3M: Interest rate spread 10 year versus 3 month.

The series is difference stationary according to the intord function. The graph of the first difference looks stationary, the standard deviation falls by over half after differencing, meaning that the decision heuristic supports the conclusion that the data is difference stationary. We can reject the null of non-stationarity for the ADF test at the first difference and the ACF declines faster at the first difference than at the levels.

ggplot(ps3_data, aes(x=date, y=T10Y3M)) +
  geom_line() +
  ggtitle("Interest rate spread 10 year versus 3 month.")

intord(ps3_data$T10Y3M)

## $test_results
##   level crit_value ADF_round1_stat ADF_round2_stat round_1_dec round_2_dec
## 1   10%      -2.58            -3.1           -6.62         REJ         REJ
## 2    5%      -2.89            -3.1           -6.62         REJ         REJ
## 3    1%      -3.50            -3.1           -6.62         FTR         REJ
  1. GDPC1: Real Gross Domestic Product

The data has a clear trend and appears stationary when detrended. The graph of the first difference looks stationary, the standard deviation falls by over half after differencing, meaning that the decision heuristic supports the conclusion that the data is difference stationary. We can reject the null of non-stationarity for the ADF test at the first difference and the ACF declines faster at the first difference than at the levels. Thus, the data is trend stationary which is also apparent when we take the first difference.

ggplot(ps3_data, aes(x=date, y=GDPC1)) +
  geom_line() +
  ggtitle("Real Gross Domestic Product")

intord(ps3_data$GDPC1)

## $test_results
##   level crit_value ADF_round1_stat ADF_round2_stat round_1_dec round_2_dec
## 1   10%      -2.58           -0.24            -6.8         FTR         REJ
## 2    5%      -2.89           -0.24            -6.8         FTR         REJ
## 3    1%      -3.50           -0.24            -6.8         FTR         REJ
model <- lm(GDPC1 ~ date, data = ps3_data)
detrended_GDPC1 <- ps3_data %>%
  mutate(detrended_GDPC1 = GDPC1 - predict(model))

ggplot(detrended_GDPC1, aes(x=date, y=detrended_GDPC1)) +
  geom_line() +
  ggtitle("Graph of Detrended Real Gross Domestic Product")

  1. GDPPOT: Real Potential Gross Domestic Product

The data has a clear trend and appears stationary when detrended. The graph of the first difference looks stationary, the standard deviation falls by over half after differencing, meaning that the decision heuristic supports the conclusion that the data is difference stationary. We can reject the null of non-stationarity for the ADF test at the first difference and the ACF declines faster at the first difference than at the levels. Thus, the data is trend stationary which is also apparent when we take the first difference.

ggplot(ps3_data, aes(x=date, y=GDPPOT)) +
  geom_line() +
  ggtitle("Real Potential Gross Domestic Product")

intord(ps3_data$GDPPOT)

## $test_results
##   level crit_value ADF_round1_stat ADF_round2_stat round_1_dec round_2_dec
## 1   10%      -2.58           -0.71           -2.64         FTR         REJ
## 2    5%      -2.89           -0.71           -2.64         FTR         FTR
## 3    1%      -3.50           -0.71           -2.64         FTR         FTR
model <- lm(GDPPOT ~ date, data = ps3_data)
detrended_GDPPOT <- ps3_data %>%
  mutate(detrended_GDPPOT = GDPPOT - predict(model))

ggplot(detrended_GDPPOT, aes(x=date, y=detrended_GDPPOT)) +
  geom_line() +
  ggtitle("Graph of Detrended Real Potential Gross Domestic Product")

  1. FEDFUNDS: Effective Federal Funds Rate

The data is not stationary at its levels but is stationary at the first difference. The graph of the first difference looks stationary, the standard deviation falls by over half after differencing, meaning that the decision heuristic supports the conclusion that the data is difference stationary. We can reject the null of non-stationarity for the ADF test at the first difference and the ACF declines faster at the first difference than at the levels.

ggplot(ps3_data, aes(x=date, y=FEDFUNDS)) +
  geom_line() +
  ggtitle("Effective Federal Funds Rate")

intord(ps3_data$FEDFUNDS)

## $test_results
##   level crit_value ADF_round1_stat ADF_round2_stat round_1_dec round_2_dec
## 1   10%      -2.58           -2.35           -4.36         FTR         REJ
## 2    5%      -2.89           -2.35           -4.36         FTR         REJ
## 3    1%      -3.50           -2.35           -4.36         FTR         REJ
  1. OUTPUTGAP: ((GDPC1-GDPPOT)/GDPPOT)*100

The data appears to be stationary at its levels. However, the graph of the first difference looks more stationary than the levels, the standard deviation falls by over half after differencing, meaning that the decision heuristic supports the conclusion that the data is difference stationary. We can reject the null of non-stationarity for the ADF test at the first difference and the ACF declines faster at the first difference than at the levels. Thus, the data is difference stationary.

ggplot(ps3_data, aes(x=date, y=OUTPUTGAP)) +
  geom_line() +
  ggtitle("Output Gap")

intord(ps3_data$OUTPUTGAP)

## $test_results
##   level crit_value ADF_round1_stat ADF_round2_stat round_1_dec round_2_dec
## 1   10%      -2.58           -2.18           -7.21         FTR         REJ
## 2    5%      -2.89           -2.18           -7.21         FTR         REJ
## 3    1%      -3.50           -2.18           -7.21         FTR         REJ
  1. Separate your data into a training set and test set. The test set should have 4 observations
# 1. Unemployment Rate (UNRATE)
UNRATE <- ts(ps3_data$UNRATE, start = c(1987, 1), freq = 4)
train_unrate <- ts(UNRATE[1:112], start = c(1987, 1), freq = 4)
test_unrate <- ts(UNRATE[113:116], start = c(2015, 1), freq = 4)

# 2. PCE Excluding Food and Energy (PCEPILFE)
PCEPILFE <- ts(ps3_data$PCEPILFE, start = c(1987, 1), freq = 4)
train_pce <- ts(PCEPILFE[1:112], start = c(1987, 1), freq = 4)
test_pce <- ts(PCEPILFE[113:116], start = c(2015, 1), freq = 4)

# 3. Expected Inflation - 2 Years (EXPINF2YR)
EXPINF2YR <- ts(ps3_data$EXPINF2YR, start = c(1987, 1), freq = 4)
train_expinf <- ts(EXPINF2YR[1:112], start = c(1987, 1), freq = 4)
test_expinf <- ts(EXPINF2YR[113:116], start = c(2015, 1), freq = 4)

# 4. Interest rate spread 10 year versus 3 month (T10Y3M)
T10Y3M <- ts(ps3_data$T10Y3M, start = c(1987, 1), freq = 4)
train_t10y3m <- ts(T10Y3M[1:112], start = c(1987, 1), freq = 4)
test_t10y3m <- ts(T10Y3M[113:116], start = c(2015, 1), freq = 4)

# 5. Real Gross Domestic Product (GDPC1)
GDPC1 <- ts(ps3_data$GDPC1, start = c(1987, 1), freq = 4)
train_gdp <- ts(GDPC1[1:112], start = c(1987, 1), freq = 4)
test_gdp <- ts(GDPC1[113:116], start = c(2015, 1), freq = 4)

# 6. Real Potential Gross Domestic Product (GDPPOT)
GDPPOT <- ts(ps3_data$GDPPOT, start = c(1987, 1), freq = 4)
train_gdppot <- ts(GDPPOT[1:112], start = c(1987, 1), freq = 4)
test_gdppot <- ts(GDPPOT[113:116], start = c(2015, 1), freq = 4)

# 7. Effective Federal Funds Rate (FEDFUNDS)
FEDFUNDS <- ts(ps3_data$FEDFUNDS, start = c(1987, 1), freq = 4)
train_fedfunds <- ts(FEDFUNDS[1:112], start = c(1987, 1), freq = 4)
test_fedfunds <- ts(FEDFUNDS[113:116], start = c(2015, 1), freq = 4)

# 8. Output Gap (((GDPC1 - GDPPOT) / GDPPOT) * 100)
OUTPUTGAP <- ts(((GDPC1 - GDPPOT) / GDPPOT) * 100, start = c(1987, 1), freq = 4)
train_outputgap <- ts(OUTPUTGAP[1:112], start = c(1987, 1), freq = 4)
test_outputgap <- ts(OUTPUTGAP[113:116], start = c(2015, 1), freq = 4)
  1. Build a dynamic regression model of FEDFUNDS that includes what you think is the optimal number of lags for each of the other variable (and the dependent variable). Be sure to consider any evidence of serial correlation in the error term. (As a note, you can comment on possible serial correlation at the seasonal lags but don’t worry about including seasonality in your model). Provide your best model and explanation of the steps you took to obtain your best model.

I selected this specific model by first thinking about what variables could have an impact on the federal funds rate. Then, I ran a few different combinations and looked at the AIC to determine which model I use. I decided to use AIC since there is likely not a best model to fit this data and a larger set of covariates will need to be included.

modeling_data <- ps3_data %>%
  mutate(dUNRATE = difference(UNRATE),
         dEXPINF2YR = difference(EXPINF2YR),
         dPCEPILFE = difference(PCEPILFE),
         dT10Y3M = difference(T10Y3M),
         dOUTPUTGAP = difference(OUTPUTGAP),
         dFFR = difference(FEDFUNDS)) %>%
  mutate(across(c(dUNRATE, dEXPINF2YR,dPCEPILFE, dT10Y3M, dOUTPUTGAP, dFFR),
                list(lag_1 = ~ lag(.x, 1),
                     lag_2 = ~ lag(.x, 2),
                     lag_3 = ~ lag(.x, 3),
                     lag_4 = ~ lag(.x, 4)))) %>%
  filter(complete.cases(.)) 


fed_funds_mod <- lm(dFFR ~ dFFR_lag_1 + dEXPINF2YR + dEXPINF2YR_lag_1 + dPCEPILFE +
                    dPCEPILFE_lag_1 + dOUTPUTGAP + dOUTPUTGAP_lag_1 +
                    dT10Y3M + dUNRATE + dUNRATE_lag_1,
                    data = modeling_data)

summary(fed_funds_mod)
## 
## Call:
## lm(formula = dFFR ~ dFFR_lag_1 + dEXPINF2YR + dEXPINF2YR_lag_1 + 
##     dPCEPILFE + dPCEPILFE_lag_1 + dOUTPUTGAP + dOUTPUTGAP_lag_1 + 
##     dT10Y3M + dUNRATE + dUNRATE_lag_1, data = modeling_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.40101 -0.13327 -0.00441  0.11164  0.45199 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       0.04575    0.06450   0.709   0.4797    
## dFFR_lag_1        0.31182    0.05952   5.239 8.98e-07 ***
## dEXPINF2YR        0.81027    0.08233   9.842  < 2e-16 ***
## dEXPINF2YR_lag_1  0.02027    0.08015   0.253   0.8008    
## dPCEPILFE        -0.03281    0.18579  -0.177   0.8602    
## dPCEPILFE_lag_1  -0.14473    0.18259  -0.793   0.4299    
## dOUTPUTGAP        0.10742    0.04184   2.568   0.0117 *  
## dOUTPUTGAP_lag_1  0.01556    0.04314   0.361   0.7191    
## dT10Y3M          -0.56192    0.05416 -10.376  < 2e-16 ***
## dUNRATE           0.02610    0.11412   0.229   0.8196    
## dUNRATE_lag_1    -0.01333    0.09700  -0.137   0.8910    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.191 on 100 degrees of freedom
## Multiple R-squared:  0.8384, Adjusted R-squared:  0.8222 
## F-statistic: 51.87 on 10 and 100 DF,  p-value: < 2.2e-16
AIC(fed_funds_mod)
## [1] -40.10651
ggAcf(fed_funds_mod$residuals, lag.max = 24) + theme_bw()

ggPacf(fed_funds_mod$residuals, lag.max = 24) + theme_bw()

  1. Choose an appropriate loss function and predict the test set and determine how well your prediction fits the test set
fitted_values <- predict(fed_funds_mod, newdata = modeling_data)
ffer <- ts(fitted_values, start = c(1987, 1), freq = 4)
forecasted_values_ffer <- forecast(ffer, h = 4)


mse <- mean((forecasted_values_ffer$mean-test_fedfunds)^2)
mse
## [1] 0.04478109
  1. Test each variable in your final model for Granger Causality, i.e. test whether each variable has any effect on FEDFUNDS over time.
independent_vars <- c("dFFR_lag_1", "dEXPINF2YR", "dEXPINF2YR_lag_1", 
                      "dPCEPILFE", "dPCEPILFE_lag_1", "dOUTPUTGAP", 
                      "dOUTPUTGAP_lag_1", "dT10Y3M", "dUNRATE_lag_1")


significant_vars <- c()

for (var in independent_vars) {

  formula <- as.formula(paste("FEDFUNDS ~ ", var))
  

  granger_test <- grangertest(formula, order = 2, data = modeling_data)
  
  cat("Granger Causality Test for", var, "vs FEDFUNDS:\n")
  print(granger_test)
  
 
  if (granger_test$`Pr(>F)`[2] < 0.05) {
    significant_vars <- c(significant_vars, var) 
    
  }
}
## Granger Causality Test for dFFR_lag_1 vs FEDFUNDS:
## Granger causality test
## 
## Model 1: FEDFUNDS ~ Lags(FEDFUNDS, 1:2) + Lags(dFFR_lag_1, 1:2)
## Model 2: FEDFUNDS ~ Lags(FEDFUNDS, 1:2)
##   Res.Df Df      F Pr(>F)
## 1    104                 
## 2    106 -2 0.6498 0.5242
## Granger Causality Test for dEXPINF2YR vs FEDFUNDS:
## Granger causality test
## 
## Model 1: FEDFUNDS ~ Lags(FEDFUNDS, 1:2) + Lags(dEXPINF2YR, 1:2)
## Model 2: FEDFUNDS ~ Lags(FEDFUNDS, 1:2)
##   Res.Df Df      F Pr(>F)
## 1    104                 
## 2    106 -2 0.5577 0.5742
## Granger Causality Test for dEXPINF2YR_lag_1 vs FEDFUNDS:
## Granger causality test
## 
## Model 1: FEDFUNDS ~ Lags(FEDFUNDS, 1:2) + Lags(dEXPINF2YR_lag_1, 1:2)
## Model 2: FEDFUNDS ~ Lags(FEDFUNDS, 1:2)
##   Res.Df Df      F Pr(>F)
## 1    104                 
## 2    106 -2 2.0038   0.14
## Granger Causality Test for dPCEPILFE vs FEDFUNDS:
## Granger causality test
## 
## Model 1: FEDFUNDS ~ Lags(FEDFUNDS, 1:2) + Lags(dPCEPILFE, 1:2)
## Model 2: FEDFUNDS ~ Lags(FEDFUNDS, 1:2)
##   Res.Df Df      F Pr(>F)
## 1    104                 
## 2    106 -2 1.0897 0.3401
## Granger Causality Test for dPCEPILFE_lag_1 vs FEDFUNDS:
## Granger causality test
## 
## Model 1: FEDFUNDS ~ Lags(FEDFUNDS, 1:2) + Lags(dPCEPILFE_lag_1, 1:2)
## Model 2: FEDFUNDS ~ Lags(FEDFUNDS, 1:2)
##   Res.Df Df      F Pr(>F)
## 1    104                 
## 2    106 -2 0.0769 0.9261
## Granger Causality Test for dOUTPUTGAP vs FEDFUNDS:
## Granger causality test
## 
## Model 1: FEDFUNDS ~ Lags(FEDFUNDS, 1:2) + Lags(dOUTPUTGAP, 1:2)
## Model 2: FEDFUNDS ~ Lags(FEDFUNDS, 1:2)
##   Res.Df Df      F  Pr(>F)  
## 1    104                    
## 2    106 -2 3.7345 0.02715 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Granger Causality Test for dOUTPUTGAP_lag_1 vs FEDFUNDS:
## Granger causality test
## 
## Model 1: FEDFUNDS ~ Lags(FEDFUNDS, 1:2) + Lags(dOUTPUTGAP_lag_1, 1:2)
## Model 2: FEDFUNDS ~ Lags(FEDFUNDS, 1:2)
##   Res.Df Df      F Pr(>F)
## 1    104                 
## 2    106 -2 2.1991  0.116
## Granger Causality Test for dT10Y3M vs FEDFUNDS:
## Granger causality test
## 
## Model 1: FEDFUNDS ~ Lags(FEDFUNDS, 1:2) + Lags(dT10Y3M, 1:2)
## Model 2: FEDFUNDS ~ Lags(FEDFUNDS, 1:2)
##   Res.Df Df     F  Pr(>F)  
## 1    104                   
## 2    106 -2 2.448 0.09143 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Granger Causality Test for dUNRATE_lag_1 vs FEDFUNDS:
## Granger causality test
## 
## Model 1: FEDFUNDS ~ Lags(FEDFUNDS, 1:2) + Lags(dUNRATE_lag_1, 1:2)
## Model 2: FEDFUNDS ~ Lags(FEDFUNDS, 1:2)
##   Res.Df Df     F Pr(>F)
## 1    104                
## 2    106 -2 0.434 0.6491
cat("Significant variables based on Granger causality test:", significant_vars, "\n")
## Significant variables based on Granger causality test: dOUTPUTGAP
  1. Using the restricted model(s) from your Granger Causality test, predict the test set and calculate the appropriate loss function. Comment on which model predicts better according to your chosen loss function.

The restricted model better predicts according to MSE.

fed_funds_mod_res <- lm(dFFR ~ dOUTPUTGAP,
                    data = modeling_data)

summary(fed_funds_mod_res)
## 
## Call:
## lm(formula = dFFR ~ dOUTPUTGAP, data = modeling_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.17302 -0.17370  0.03594  0.20058  0.94704 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.05553    0.03956  -1.403    0.163    
## dOUTPUTGAP   0.32369    0.07073   4.577 1.26e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4168 on 109 degrees of freedom
## Multiple R-squared:  0.1612, Adjusted R-squared:  0.1535 
## F-statistic: 20.94 on 1 and 109 DF,  p-value: 1.259e-05
ggAcf(fed_funds_mod_res$residuals, lag.max = 24) + theme_bw()

fitted_values_res <- predict(fed_funds_mod_res, newdata = modeling_data)

ffer_res <- ts(fitted_values_res, start = c(1987, 1), freq = 4)
forecasted_values_ffer_res <- forecast(ffer_res, h = 4)


mse <- mean((forecasted_values_ffer_res$mean-test_fedfunds)^2)
mse
## [1] 0.02754693

Question 2

Starting with the data in the aggmacro_data dataframe, complete the following:

  1. Using both the Engle-Granger method and a Johansen test determine if there is any evidence of cointegration between the variables (in log form) unemployment (UNEMP), CPI, and income (TOTPERSINC). Provide some evidence and clearly state your conclusions.Note: For the EG method I just want you to do the first stage and determine which variables are cointegrated.

Log of CPI is an I(1) process. The series is not stationary at the levels but is at the first difference. We can reject the null of non-stationarity for the ADF test at the first difference and the process appears mean reverting and homoskedastic when plotted with one difference.

Log of unemployment is an I(2) process. The series is not stationary at levels or the first difference but appears mean reverting and homoskedastic when differenced twice.

The log of total personal income is an I(1) process. The plot of the levels is clearly non-stationary as it has a trend but the first differenced data appears stationary and we can reject the null of non-stationarity for the ADF test for the first difference.

q2 <- read.csv("aggmacro.csv")
q2$log_CPI <- log(q2$CPI)
q2$log_TOTPERSINC <- log(q2$TOTPERSINC)
q2$log_HOUSHFINASSET <- log(q2$HOUSHFINASSET)
q2$log_UNEMPL <- log(q2$UNEMP)

#Engle-Granger

intord(q2$log_CPI)

## $test_results
##   level crit_value ADF_round1_stat ADF_round2_stat round_1_dec round_2_dec
## 1   10%      -2.58           -1.03           -7.04         FTR         REJ
## 2    5%      -2.89           -1.03           -7.04         FTR         REJ
## 3    1%      -3.49           -1.03           -7.04         FTR         REJ
intord(q2$log_UNEMPL)

## $test_results
##   level crit_value ADF_round1_stat ADF_round2_stat round_1_dec round_2_dec
## 1   10%      -2.58           -1.27           -2.09         FTR         FTR
## 2    5%      -2.89           -1.27           -2.09         FTR         FTR
## 3    1%      -3.49           -1.27           -2.09         FTR         FTR
intord(q2$log_TOTPERSINC)

## $test_results
##   level crit_value ADF_round1_stat ADF_round2_stat round_1_dec round_2_dec
## 1   10%      -2.58           -1.46           -5.46         FTR         REJ
## 2    5%      -2.89           -1.46           -5.46         FTR         REJ
## 3    1%      -3.49           -1.46           -5.46         FTR         REJ

The log of CPI and the log of total personal income are both I(1) processes, I will now use the Engle-Granger method and the Johansen test to look for cointegration.

Using the Engle-Granger method, we fail to reject the null of non-stationarity at the levels for the ADF test, meaning that the combination of these series is not an I(0) process according to that test. Moreover, the ACF is much faster declining at the first difference than the levels, also providing evidence that the combined series is still an I(1). However, the graph of the levels does appear stationary.

The Johansen test finds no cointegrating relationship between the log of CPI and the log of personal income as the test statistic is smaller than the critical values.

#Engle-Granger
eg <- lm(log_CPI~log_TOTPERSINC - 1, data = q2)$residuals
intord(eg)

## $test_results
##   level crit_value ADF_round1_stat ADF_round2_stat round_1_dec round_2_dec
## 1   10%      -2.58           -1.91           -9.14         FTR         REJ
## 2    5%      -2.89           -1.91           -9.14         FTR         REJ
## 3    1%      -3.49           -1.91           -9.14         FTR         REJ
# Johansen Test
joh_test <- ca.jo(x = q2[, c('log_CPI', 'log_TOTPERSINC')])                  
summary(joh_test)
## 
## ###################### 
## # Johansen-Procedure # 
## ###################### 
## 
## Test type: maximal eigenvalue statistic (lambda max) , with linear trend 
## 
## Eigenvalues (lambda):
## [1] 0.072070192 0.004611693
## 
## Values of teststatistic and critical values of test:
## 
##          test 10pct  5pct  1pct
## r <= 1 | 0.55  6.50  8.18 11.65
## r = 0  | 8.83 12.91 14.90 19.19
## 
## Eigenvectors, normalised to first column:
## (These are the cointegration relations)
## 
##                   log_CPI.l2 log_TOTPERSINC.l2
## log_CPI.l2         1.0000000          1.000000
## log_TOTPERSINC.l2 -0.5819867         -4.643651
## 
## Weights W:
## (This is the loading matrix)
## 
##                   log_CPI.l2 log_TOTPERSINC.l2
## log_CPI.d        -0.07746758      4.730235e-05
## log_TOTPERSINC.d  0.00507093      7.604457e-04

Question 3

Starting with the data in the cintiemp_data dataframe, complete the following: (i) Using both the Engle-Granger method and a Johansen test determine if there is any evidence of cointegration between the variables (in log form) employment, labor force, and unemployment. Provide some evidence and clearly state your conclusions. Note: For the EG method I just want you to do the first stage and determine which variables are cointegrated.

All three series are I(1) processes. We see evidence that the data is stationary at the first difference. The graph of the first differences looks stationary, the standard deviations fall by over half after differencing, meaning that the decision heuristic supports the conclusion that the data is difference stationary. We can reject the null of non-stationarity for the ADF test at the first differences and the ACF declines faster at the first difference than at the levels.

In order for series to be cointergrated, they need to have the same order of integration that is greater than I(0). Since all three series are I(1), we can test each one against the others for cointegration. The series are cointegrated if the order of integration for the combined series is less than the individual orders of intergration. Thus, for these series, we would need to see that the order of integration of the residuals are an I(0) process.

q3 <- read.csv("cintiemp.csv")

q3$log_employ <- log(q3$employment)
q3$log_labor_force <- log(q3$labor.force)
q3$log_unemp <- log(q3$unemployment)


intord(q3$log_employ)

## $test_results
##   level crit_value ADF_round1_stat ADF_round2_stat round_1_dec round_2_dec
## 1   10%      -2.58           -2.92           -1.49         REJ         FTR
## 2    5%      -2.89           -2.92           -1.49         REJ         FTR
## 3    1%      -3.49           -2.92           -1.49         FTR         FTR
intord(q3$log_labor_force)

## $test_results
##   level crit_value ADF_round1_stat ADF_round2_stat round_1_dec round_2_dec
## 1   10%      -2.58           -1.02            -2.7         FTR         REJ
## 2    5%      -2.89           -1.02            -2.7         FTR         FTR
## 3    1%      -3.49           -1.02            -2.7         FTR         FTR
intord(q3$log_unemp)

## $test_results
##   level crit_value ADF_round1_stat ADF_round2_stat round_1_dec round_2_dec
## 1   10%      -2.58           -2.66           -1.75         REJ         FTR
## 2    5%      -2.89           -2.66           -1.75         FTR         FTR
## 3    1%      -3.49           -2.66           -1.75         FTR         FTR

Neither the Engle-Granger nor the Johansen method find evidence of cointegration among the series. When I look at the residuals from the Engle-Granger regressions, I do not see evidence that the residuals are I(0), all the residuals appear difference stationary, making them still I(1). The test statistics from the Johansan test are all smaller than the critical values, meaning there is no evidence of a cointegrating relationship between any of the variables.

#Engle-Granger
eg_1 <- lm(log_employ~log_labor_force - 1, data = q3)$residuals
intord(eg_1)

## $test_results
##   level crit_value ADF_round1_stat ADF_round2_stat round_1_dec round_2_dec
## 1   10%      -2.58           -3.09           -1.74         REJ         FTR
## 2    5%      -2.89           -3.09           -1.74         REJ         FTR
## 3    1%      -3.49           -3.09           -1.74         FTR         FTR
eg_2 <- lm(log_employ~log_unemp - 1, data = q3)$residuals
intord(eg_2)

## $test_results
##   level crit_value ADF_round1_stat ADF_round2_stat round_1_dec round_2_dec
## 1   10%      -2.58           -2.76           -1.73         REJ         FTR
## 2    5%      -2.89           -2.76           -1.73         FTR         FTR
## 3    1%      -3.49           -2.76           -1.73         FTR         FTR
eg_3 <- lm(log_labor_force~log_unemp - 1, data = q3)$residuals
intord(eg_3)

## $test_results
##   level crit_value ADF_round1_stat ADF_round2_stat round_1_dec round_2_dec
## 1   10%      -2.58           -2.74           -1.73         REJ         FTR
## 2    5%      -2.89           -2.74           -1.73         FTR         FTR
## 3    1%      -3.49           -2.74           -1.73         FTR         FTR
# Johansen Test
joh_test_2 <- ca.jo(x = q3[, c('log_employ', 'log_labor_force','log_unemp')])                  
summary(joh_test_2)
## 
## ###################### 
## # Johansen-Procedure # 
## ###################### 
## 
## Test type: maximal eigenvalue statistic (lambda max) , with linear trend 
## 
## Eigenvalues (lambda):
## [1] 0.09110413 0.07070039 0.01312992
## 
## Values of teststatistic and critical values of test:
## 
##           test 10pct  5pct  1pct
## r <= 2 |  1.72  6.50  8.18 11.65
## r <= 1 |  9.53 12.91 14.90 19.19
## r = 0  | 12.42 18.90 21.07 25.75
## 
## Eigenvectors, normalised to first column:
## (These are the cointegration relations)
## 
##                    log_employ.l2 log_labor_force.l2 log_unemp.l2
## log_employ.l2         1.00000000         1.00000000    1.0000000
## log_labor_force.l2   38.75327175        -1.05362808   -1.0377341
## log_unemp.l2         -0.07553971         0.07169377    0.1149686
## 
## Weights W:
## (This is the loading matrix)
## 
##                   log_employ.l2 log_labor_force.l2 log_unemp.l2
## log_employ.d       -0.003676700       -0.076889969  0.001266009
## log_labor_force.d  -0.002337771        0.007614527 -0.035040169
## log_unemp.d         0.016778859       -0.736126734 -0.586537588